#!/usr/bin/env python
# encoding: utf-8
from bs4 import BeautifulSoup
from mongo_pipeline import MongoDBPipeline
from request import Request
from spider import Spider
from bson.objectid import ObjectId
from console_pipeline import ConsolePipeline
import time
import datetime


class MyProcess(object):
    start_url = []
    useProxy = True
    headers = None

    def __init__(self):
        self.start_url = self.__generate_url()
        self.useProxy = True

    def __generate_url(self):
        for i in range(3, 5):
            yield "http://www.kx1d.com/neihanmanhua/index_%d.html" % i

    def process(self, html):
        items = {}
        soup = BeautifulSoup(html, 'lxml')
        print(soup.head.title.text)
        elements = soup.find_all(class_="liL")[0].find_all("li")
        print("count:", len(elements))
        for element in elements:
            items["title"] = element.a["title"]
            items["link"] = element.a["href"]
            url = "http://www.kx1d.com" + element.a["href"]
            print("title:" + items["title"])
            request = Request(url=url, callback=self.process_detail)
            time.sleep(0.1)
            yield request
        #     requests.append(request)
        # return requests

    def process_detail(self, html):
        items = {}
        soup = BeautifulSoup(html, 'lxml')
        title = soup.find(class_="articleTitle").find("h1")
        elements = soup.find(class_="articleBody")
        for element in elements:
            src = element.img['src']
            items["_id"] = ObjectId()
            items["src"] = src
            items["title"] = title.text
            items["date"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            yield items
        #     items.append(src)
        # return items


if __name__ == "__main__":
    thread_options = {"multiplethread": True, "queueTimeOut": 2}
    Spider(MyProcess(), threadoptions=thread_options) \
        .addPipeline(ConsolePipeline()) \
        .addPipeline(MongoDBPipeline(database="video", collection="spider")) \
        .start()
